Data Cleaning

manhattan <- read.csv("https://raw.githubusercontent.com/lcao21/GIS3_Final_Project/master/Data/manhattan.csv", stringsAsFactors=FALSE)
head(manhattan)
##   BOROUGH  NEIGHBORHOOD        BUILDING.CLASS.CATEGORY TAX.CLASS.AT.PRESENT
## 1       1 ALPHABET CITY        01 ONE FAMILY DWELLINGS                    1
## 2       1 ALPHABET CITY        01 ONE FAMILY DWELLINGS                    1
## 3       1 ALPHABET CITY        02 TWO FAMILY DWELLINGS                    1
## 4       1 ALPHABET CITY      03 THREE FAMILY DWELLINGS                    1
## 5       1 ALPHABET CITY      03 THREE FAMILY DWELLINGS                    1
## 6       1 ALPHABET CITY 07 RENTALS - WALKUP APARTMENTS                    2
##   BLOCK LOT EASE.MENT BUILDING.CLASS.AT.PRESENT              ADDRESS
## 1   376  43        NA                        S1  743 EAST 6TH STREET
## 2   390  61        NA                        A4  189 EAST 7TH STREET
## 3   404   1        NA                        B9         166 AVENUE A
## 4   377  56        NA                        C0  263 EAST 7TH STREET
## 5   393   9        NA                        C0 604 EAST 11TH STREET
## 6   372  23        NA                        C1  300 EAST 3RD STREET
##   APARTMENT.NUMBER ZIP.CODE RESIDENTIAL.UNITS COMMERCIAL.UNITS TOTAL.UNITS
## 1                     10009                 1                1           2
## 2                     10009                 1                0           1
## 3                     10009                 2                0           2
## 4                     10009                 3                0           3
## 5                     10009                 3                0           3
## 6                     10009                12                0          12
##   LAND.SQUARE.FEET GROSS.SQUARE.FEET YEAR.BUILT TAX.CLASS.AT.TIME.OF.SALE
## 1            2,090             3,680       1940                         1
## 2              987             2,183       1860                         1
## 3            1,510             4,520       1900                         1
## 4            2,430             3,600       1899                         1
## 5            2,375             5,110       1939                         1
## 6            2,393             7,989       2001                         2
##   BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE SALE.DATE
## 1                             S1  3,200,000   7/24/19
## 2                             A4          0   9/25/19
## 3                             B9          0   7/22/19
## 4                             C0  6,300,000   4/30/19
## 5                             C0          0  10/24/19
## 6                             C1  1,950,000    8/8/19

Remove unimportant columns

drops <- c("BOROUGH","EASE.MENT", "TAX.CLASS.AT.PRESENT", "BUILDING.CLASS.AT.PRESENT", "APARTMENT.NUMBER")
manhattan <- manhattan[ , !(names(manhattan) %in% drops)]
head(manhattan)
##    NEIGHBORHOOD        BUILDING.CLASS.CATEGORY BLOCK LOT              ADDRESS
## 1 ALPHABET CITY        01 ONE FAMILY DWELLINGS   376  43  743 EAST 6TH STREET
## 2 ALPHABET CITY        01 ONE FAMILY DWELLINGS   390  61  189 EAST 7TH STREET
## 3 ALPHABET CITY        02 TWO FAMILY DWELLINGS   404   1         166 AVENUE A
## 4 ALPHABET CITY      03 THREE FAMILY DWELLINGS   377  56  263 EAST 7TH STREET
## 5 ALPHABET CITY      03 THREE FAMILY DWELLINGS   393   9 604 EAST 11TH STREET
## 6 ALPHABET CITY 07 RENTALS - WALKUP APARTMENTS   372  23  300 EAST 3RD STREET
##   ZIP.CODE RESIDENTIAL.UNITS COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET
## 1    10009                 1                1           2            2,090
## 2    10009                 1                0           1              987
## 3    10009                 2                0           2            1,510
## 4    10009                 3                0           3            2,430
## 5    10009                 3                0           3            2,375
## 6    10009                12                0          12            2,393
##   GROSS.SQUARE.FEET YEAR.BUILT TAX.CLASS.AT.TIME.OF.SALE
## 1             3,680       1940                         1
## 2             2,183       1860                         1
## 3             4,520       1900                         1
## 4             3,600       1899                         1
## 5             5,110       1939                         1
## 6             7,989       2001                         2
##   BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE SALE.DATE
## 1                             S1  3,200,000   7/24/19
## 2                             A4          0   9/25/19
## 3                             B9          0   7/22/19
## 4                             C0  6,300,000   4/30/19
## 5                             C0          0  10/24/19
## 6                             C1  1,950,000    8/8/19

Keep only rows for family dwellings

library(data.table)
manhattan <- manhattan[manhattan$BUILDING.CLASS.CATEGORY %like% "FAMILY", ]

Keep only rows that have non-zero values for sale price

manhattan <-manhattan[!(manhattan$"SALE.PRICE"== 0),]

replace_commas<-function(x){
  x<-as.numeric(gsub("\\,", "", x))
}

manhattan$SALE.PRICE <- replace_commas(manhattan$SALE.PRICE)
manhattan$SALE.PRICE <- as.numeric(manhattan$SALE.PRICE)

Geocoding

Clean addresses

library(stringr)
# Convert addresses to type character
manhattan$ADDRESS <- as.character(manhattan$ADDRESS)

# Remove anything before a dash, before a slash, and after a comma
manhattan$ADDRESS <- gsub(".*-","", manhattan$ADDRESS)
manhattan$ADDRESS <- gsub(".*/","", manhattan$ADDRESS)
manhattan$ADDRESS <- gsub("(.*),.*", "\\1", manhattan$ADDRESS)

# Remove rows with address lengths less than 3 
manhattan$address_length <- c(str_count(manhattan$ADDRESS))
manhattan <- manhattan[!(manhattan$address_length < 3),]

# Add ", Manhattan" to each address
manhattan$ADDRESS <- paste(manhattan$ADDRESS, ", Manhattan", sep="")
library(ggmap)
library(OpenStreetMap)
library(tmaptools)

geocoded_addresses <- geocode_OSM(manhattan$ADDRESS, as.data.frame = TRUE)
geocoded_addresses <- geocoded_addresses[c("query","lat", "lon")]
head(geocoded_addresses)
write.csv(geocoded_addresses,"geocoded_addresses.csv", row.names = FALSE)

Since running the above chunk took a very long time, I exported the dataframe output as a csv called “geocoded_addresses.csv”, so that I can use the csv whenever I need to re-run the markdown file without having to wait >= 5 minutes.

geocoded_addresses <- read.csv("https://raw.githubusercontent.com/lcao21/GIS3_Final_Project/master/Data/geocoded_addresses.csv")
head(geocoded_addresses)
##                             query      lat       lon
## 1  743 EAST 6TH STREET, Manhattan 40.72290 -73.97740
## 2  189 EAST 7TH STREET, Manhattan 40.72493 -73.98051
## 3         166 AVENUE A, Manhattan 40.72811 -73.98174
## 4  263 EAST 7TH STREET, Manhattan 40.72363 -73.97741
## 5 604 EAST 11TH STREET, Manhattan 40.72712 -73.97916
## 6 483 WEST 22ND STREET, Manhattan 40.74700 -74.00411

The geocode_OSM() function couldn’t find a few addresses that aren’t huge losses so I’ll ignore those lost rows and merge the geocoded_addresses df with my manhattan df. In the case, since manhattan has more rows than geocoded_addresses, I conduct an “inner join” with merge():

library(dplyr)

# Inner join 
manhattan <- merge(manhattan, geocoded_addresses, by.x = "ADDRESS", by.y = "query")
# Drop address_length column
manhattan <- manhattan[ , !(names(manhattan) %in% c("address_length"))]
head(manhattan)
##                           ADDRESS              NEIGHBORHOOD
## 1       1 SUTTON PLACE, Manhattan              MIDTOWN EAST
## 2 106 EAST 78TH STREET, Manhattan   UPPER EAST SIDE (59-79)
## 3 109 WEST 11TH STREET, Manhattan    GREENWICH VILLAGE-WEST
## 4   11 EAST 129 STREET, Manhattan            HARLEM-CENTRAL
## 5  11 WEST 12TH STREET, Manhattan GREENWICH VILLAGE-CENTRAL
## 6 110 WASHINGTON PLACE, Manhattan    GREENWICH VILLAGE-WEST
##     BUILDING.CLASS.CATEGORY BLOCK LOT ZIP.CODE RESIDENTIAL.UNITS
## 1   01 ONE FAMILY DWELLINGS  1372  26    10022                 1
## 2   01 ONE FAMILY DWELLINGS  1412  68    10075                 1
## 3   01 ONE FAMILY DWELLINGS   607  50    10011                 1
## 4   01 ONE FAMILY DWELLINGS  1754   8    10035                 1
## 5   01 ONE FAMILY DWELLINGS   576  48    10011                 1
## 6 03 THREE FAMILY DWELLINGS   592  14    10014                 3
##   COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET GROSS.SQUARE.FEET YEAR.BUILT
## 1                0           1            1,434             5,339       1920
## 2                1           2            1,839             4,344       1899
## 3                0           1            1,665             4,060       1899
## 4                0           1            1,081             2,800       1905
## 5                0           1            2,581             8,125       1910
## 6                0           3            1,575             2,646       1899
##   TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE SALE.DATE
## 1                         1                             A7   13000000   10/8/19
## 2                         1                             S1    7200000   6/10/19
## 3                         1                             A4   11200000   5/22/19
## 4                         1                             A4    1550000   8/22/19
## 5                         1                             A4   19900000   6/14/19
## 6                         1                             C0    6800000   4/15/19
##        lat       lon
## 1 40.75738 -73.96031
## 2 40.77457 -73.96056
## 3 40.73577 -73.99867
## 4 40.75523 -73.96185
## 5 40.73509 -73.99506
## 6 14.27621 120.96607

An entry has really suspicious lat and lon columns, as they’re both wildly different than the others:

##            manhattan[6, ]$ADDRESS manhattan[6, ]$lat manhattan[6, ]$lon
## 1 110 WASHINGTON PLACE, Manhattan           14.27621           120.9661

Using https://www.latlong.net/convert-address-to-lat-long.html, I found the right lat and lon and manually replaced the column values.

manhattan[6,]$lat <- 40.732422
manhattan[6,]$lon <- -74.001152

# Check the min and max of lat and lon that will also be used for mapping
height <- max(manhattan$lat) - min(manhattan$lat)
width <- max(manhattan$lon) - min(manhattan$lon)
borders <- c(bottom  = min(manhattan$lat)  - 0.2 * height,
             top     = max(manhattan$lat)  + 0.2 * height,
             left    = min(manhattan$lon) - 0.2 * width,
             right   = max(manhattan$lon) + 0.2 * width)
borders
##    bottom       top      left     right 
##  40.67401  40.91134 -74.03133 -73.88918

Mapping Manhattan homes

library(lubridate)
library(ggplot2)
library(ggrepel)
library(tidyverse)
library(ggmap)
library(osmdata)

map <- get_stamenmap(borders, zoom = 11, maptype = "toner-lite")
ggmap(map) +
  geom_point(data = manhattan, mapping = aes(x = lon, y = lat,
  col = SALE.PRICE)) +
  scale_color_gradient(low = "yellow", high = "red")

I’ll be doing more data exploration once I merge the current manhattan df with the 6 other datasets. I just wanted to output one simple map before doing so. It seems that the most expensive homes are located around Central Park (Upper East Side, Upper West Side), Greenwich Village, and SoHo.

Merging 6 variable datasets

Schools

schools <- read.csv("https://raw.githubusercontent.com/lcao21/GIS3_Final_Project/master/Data/schools.csv", stringsAsFactors = FALSE)
schools$LATITUDE <- as.numeric(schools$LATITUDE)
## Warning: NAs introduced by coercion
schools$LONGITUDE <- as.numeric(schools$LONGITUDE)
## Warning: NAs introduced by coercion

Remove rows with zeros for longitude and latitude

schools <-schools[!(schools$"LONGITUDE"== 0),]
schools <- schools[with(schools, order(LONGITUDE)),]

Filter for Manhattan schools

m_districts <- c(1,2,3,4,5,6,7,8,9,10)
schools <- schools[schools$Council.district %in% m_districts, ]

I found that filtering for council districts was the most accurate way of finding schools located in Manhattan. I initally tried using community district but the community district scale here is much different than the ones I found on the NYC.gov website. According to https://council.nyc.gov/districts/, Manhattan council districts are from 1 to 10 inclusive, though district 8 represents parts of Manhattan and the Bronx.

Next, I want to determine how far each school is from a home property, but for each property, I only care about schools within a certain proximity. Furthermore, since high schools aren’t subject to school zoning laws, distance betwen home and school is weighed more than for middle and elementary schools, which are assigned based on where students’ live. Although school zoning rules probably affect proprty sales (i.e. there’s a premium on property sales near the best public high school in Manhattan), I assume that more weight is put on attending good high schools rather than middle or elementary schools, since the former imapcts college acceptances and thus its quality is more important to parents and students.

Filter out schools that don’t serve grades 9-12

hs_grades <- c("09,10,11,12")
schools <- schools[schools$Grades_text %like% hs_grades, ]

Quick map of final schools

ggmap(map) +
  geom_point(data = schools, mapping = aes(x = LONGITUDE, y = LATITUDE))

Now that I have the high schools, I want to now know how many of these schools fall within a buffer of a Manhattan home. So, I have to first create buffers around each home, and I’ve decided to create 1609.34 meter (1 mile) buffers. To do so, I had to convert the manhattan df to an sf object with st_as_sf and also transform the original EPSG:4326 CRS to the 2263 projected coordinate system (NAD83/New York Long Island). I also created a column called home_ID that gives each row (home) a unique number. Ultimately, I get an sf object called manhattan_sf_buff that has two fields: home_ID and geometry; geometry is the buffer polygon.

Create 1609.34 meter buffers around homes

library(sf)
library(rgeos)
library(rgdal)

manhattan_copy <- copy(manhattan)
coordinates(manhattan_copy) <- c("lon", "lat")
proj4string(manhattan_copy) <- CRS("+init=epsg:4326")
manhattan_sf <- st_as_sf(manhattan_copy) %>% st_transform(2263) # nad83 
manhattan_sf_buff <- st_buffer(manhattan_sf, 1609.34)
manhattan_sf_buff$home_ID <- seq.int(nrow(manhattan_sf_buff))

manhattan_sf_buff <- subset(manhattan_sf_buff, select=c("home_ID", "geometry"))

head(as.data.frame(manhattan_sf_buff))
##   home_ID                       geometry
## 1       1 POLYGON ((996855.6 215213.3...
## 2       2 POLYGON ((996782.7 221474.6...
## 3       3 POLYGON ((986228.4 207337.2...
## 4       4 POLYGON ((996430 214430.1, ...
## 5       5 POLYGON ((987229.7 207089.4...
## 6       6 POLYGON ((985540.1 206117.7...

I go through a similar process of transforming the schools dataframe of coverting it into an sf object, re-projecting it to the EPSG:2263 projected coordinate system, and creating a column of unique, integer IDs. Ultimately, I get an sf object called schools_sf that has two fields: school_ID and geometry; geometry is of point geometry and represents each school.

schools_copy <- copy(schools)
coordinates(schools_copy) <- c("LONGITUDE", "LATITUDE")
proj4string(schools_copy) <- CRS("+init=epsg:4326")
schools_sf <- st_as_sf(schools_copy) %>% st_transform(2263)
schools_sf$school_ID <- seq.int(nrow(schools_sf))
schools_sf <- subset(schools_sf, select=c("school_ID", "geometry"))

head(as.data.frame(schools_sf))
##      school_ID                  geometry
## 1079         1 POINT (978997.2 190477.9)
## 1080         2 POINT (978997.2 190477.9)
## 757          3 POINT (979410.2 196631.7)
## 1113         4 POINT (979859.1 196347.8)
## 1029         5   POINT (980391 200872.7)
## 911          6 POINT (980561.6 196377.9)

Plot of all homes and schools

plot(manhattan_sf_buff$geometry, col="Blue")
plot(st_geometry(schools_sf), pch = 3, col = "Red", add= TRUE)

To calculate the intersections between the homes and high schools, I use the st_intersection() function from the sf library.

Calculate intersection

intersection <- st_intersection(x = manhattan_sf_buff, y = schools_sf)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries

By plotting the homes and the intersections, we can see a clear decrease in the number of high schools, telling us that only certain schools are within 1609.34 meter buffers of a home.

Plot of all homes and only schools that intersect

plot(manhattan_sf_buff$geometry, col="Blue")
plot(intersection, col = "Red", pch = 3, add = TRUE)
## Warning in plot.sf(intersection, col = "Red", pch = 3, add = TRUE): ignoring all
## but the first attribute

Calculate the number of schools within the buffer of each home

int_result <- intersection %>% 
  group_by(home_ID) %>% 
  count()

head(int_result)
## Simple feature collection with 6 features and 2 fields
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: 982544.1 ymin: 203016.9 xmax: 999668.1 ymax: 236294.8
## CRS:            EPSG:2263
## # A tibble: 6 x 3
## # Groups:   home_ID [6]
##   home_ID     n                                                         geometry
##     <int> <int>                                      <GEOMETRY [US_survey_foot]>
## 1       3     2                                        POINT (985433.1 207572.8)
## 2       5     3              MULTIPOINT ((985433.1 207572.8), (986265.6 207613))
## 3       8     1                                        POINT (993448.1 229696.9)
## 4      10     5 MULTIPOINT ((982544.1 204889.9), (982848 203278.8), (982949.5 2…
## 5      11     4 MULTIPOINT ((998395 234567.8), (998833.5 235072), (999474.2 236…
## 6      12     4                                        POINT (991316.2 225687.7)

Now we have to left join the manhattan dataframe with int_result. First, I created the same column of unique, integer ID for manhattan so that I can join the two dataframes with the shared home_ID column. After the merge, homes that don’t have schools within its buffer have NA values in the “n” column so I replace those with zeros and rename the column num_schools. I call this final dataframe manhattan_int_schools.

Left join manhattan with int_result

manhattan$home_ID <- seq.int(nrow(manhattan))
manhattan_int_schools <- merge(x = manhattan, y = int_result, by = "home_ID", all.x = TRUE)
manhattan_int_schools$n[is.na(manhattan_int_schools$n)] <- 0
colnames(manhattan_int_schools)[20] <- "num_schools"

head(manhattan_int_schools)
##   home_ID                         ADDRESS              NEIGHBORHOOD
## 1       1       1 SUTTON PLACE, Manhattan              MIDTOWN EAST
## 2       2 106 EAST 78TH STREET, Manhattan   UPPER EAST SIDE (59-79)
## 3       3 109 WEST 11TH STREET, Manhattan    GREENWICH VILLAGE-WEST
## 4       4   11 EAST 129 STREET, Manhattan            HARLEM-CENTRAL
## 5       5  11 WEST 12TH STREET, Manhattan GREENWICH VILLAGE-CENTRAL
## 6       6 110 WASHINGTON PLACE, Manhattan    GREENWICH VILLAGE-WEST
##     BUILDING.CLASS.CATEGORY BLOCK LOT ZIP.CODE RESIDENTIAL.UNITS
## 1   01 ONE FAMILY DWELLINGS  1372  26    10022                 1
## 2   01 ONE FAMILY DWELLINGS  1412  68    10075                 1
## 3   01 ONE FAMILY DWELLINGS   607  50    10011                 1
## 4   01 ONE FAMILY DWELLINGS  1754   8    10035                 1
## 5   01 ONE FAMILY DWELLINGS   576  48    10011                 1
## 6 03 THREE FAMILY DWELLINGS   592  14    10014                 3
##   COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET GROSS.SQUARE.FEET YEAR.BUILT
## 1                0           1            1,434             5,339       1920
## 2                1           2            1,839             4,344       1899
## 3                0           1            1,665             4,060       1899
## 4                0           1            1,081             2,800       1905
## 5                0           1            2,581             8,125       1910
## 6                0           3            1,575             2,646       1899
##   TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE SALE.DATE
## 1                         1                             A7   13000000   10/8/19
## 2                         1                             S1    7200000   6/10/19
## 3                         1                             A4   11200000   5/22/19
## 4                         1                             A4    1550000   8/22/19
## 5                         1                             A4   19900000   6/14/19
## 6                         1                             C0    6800000   4/15/19
##        lat       lon num_schools                       geometry
## 1 40.75738 -73.96031           0       GEOMETRYCOLLECTION EMPTY
## 2 40.77457 -73.96056           0       GEOMETRYCOLLECTION EMPTY
## 3 40.73577 -73.99867           2      POINT (985433.1 207572.8)
## 4 40.75523 -73.96185           0       GEOMETRYCOLLECTION EMPTY
## 5 40.73509 -73.99506           3 MULTIPOINT ((985433.1 20757...
## 6 40.73242 -74.00115           0       GEOMETRYCOLLECTION EMPTY

Map of only homes and schools that intersect

# Schools that intersect with homes 
schools_that_int <- as.data.frame(st_transform(intersection, crs = 4326))
schools_that_int <- schools_that_int[!duplicated(schools_that_int[,c("school_ID")]),]

schools$school_ID <- seq.int(nrow(schools))

test2 <- merge(schools, schools_that_int)
head(test2) 
##   school_ID fiscal_year system_code location_code
## 1         5        2020      02M475          M475
## 2        13        2020      75M721          M721
## 3        14        2020      75M721          M721
## 4        15        2020      02M376          M376
## 5        16        2020      02M615          M615
## 6        17        2020      84M522          M522
##                                        location_name         BEDS
## 1                             Stuyvesant High School 310200011475
## 2 P.S. M721 - Manhattan Occupational Training Center 307500011721
## 3 P.S. M721 - Manhattan Occupational Training Center 307500011721
## 4                                        NYC iSchool 310200011376
## 5 Chelsea Career and Technical Education High School 310200011615
## 6               Broome Street Academy Charter School 310200860992
##   Managed_by_name location_type_description Location_Category_Description
## 1             DOE          General Academic                   High school
## 2             DOE         Special Education              Secondary School
## 3             DOE         Special Education              Secondary School
## 4             DOE          General Academic                   High school
## 5             DOE          Career Technical                   High school
## 6         Charter          General Academic                   High school
##               Grades_text Grades_final_text               open_date
## 1             09,10,11,12       09,10,11,12 1996-06-05T00:00:00.000
## 2 06,07,08,09,10,11,12,SE    09,10,11,12,SE 1990-12-04T00:00:00.000
## 3 06,07,08,09,10,11,12,SE    09,10,11,12,SE 1990-12-04T00:00:00.000
## 4          09,10,11,12,SE       09,10,11,12 2008-07-01T00:00:00.000
## 5          09,10,11,12,SE       09,10,11,12 1905-07-01T00:00:00.000
## 6          09,10,11,12,SE       09,10,11,12 2011-07-01T00:00:00.000
##   Status_descriptions Primary_building_code     primary_address_line_1
## 1                Open                  M477        345 CHAMBERS STREET
## 2                Open                  M641    250 WEST HOUSTON STREET
## 3                Open                  M641    250 WEST HOUSTON STREET
## 4                Open                  M615 131 AVENUE OF THE AMERICAS
## 5                Open                  M615 131 AVENUE OF THE AMERICAS
## 6                Open                  MBDF 121 AVENUE OF THE AMERICAS
##   State_code X_COORDINATE Y_COORDINATE LONGITUDE LATITUDE Community_district
## 1         NY       980391       200873 -74.01392 40.71802                101
## 2         NY       982544       204890 -74.00616 40.72905                102
## 3         NY       982544       204890 -74.00616 40.72905                102
## 4         NY       982848       203279 -74.00506 40.72463                102
## 5         NY       982848       203279 -74.00506 40.72463                102
## 6         NY       982950       203014 -74.00469 40.72391                102
##   Council.district Census_tract Borough_block_lot  NTA
## 1                1        31703        1000160215 MN25
## 2                3         6700        1005810054 MN23
## 3                3         6700        1005810054 MN23
## 4                3         3700        1004910016 MN24
## 5                3         3700        1004910016 MN24
## 6                3         3700                 0 MN24
##                                 NTA_Name    Principal_Name Principal_title
## 1      Battery Park City-Lower Manhattan    Eric Contreras       Principal
## 2                           West Village      SHOLOM FRIED       Principal
## 3                           West Village      SHOLOM FRIED       Principal
## 4 SoHo-TriBeCa-Civic Center-Little Italy      Isora Bailey       Principal
## 5 SoHo-TriBeCa-Civic Center-Little Italy     Jaivelle Reed       Principal
## 6 SoHo-TriBeCa-Civic Center-Little Italy Melissa Silberman       Principal
##   Principal_phone_number   fax_number Geographical_District_code
## 1           212-312-4800 212-587-3874                          2
## 2           212-675-7926 212-255-3227                          2
## 3           212-675-7926 212-255-3227                          2
## 4           917-237-7300 212-219-0743                          2
## 5           212-925-1080 212-941-7934                          2
## 6           212-453-0295 212-966-7253                          2
##   Administrative_District_Code Administrative_District_Name
## 1                            2                         NULL
## 2                           75   CITYWIDE SPECIAL EDUCATION
## 3                           75   CITYWIDE SPECIAL EDUCATION
## 4                            2                         NULL
## 5                            2                         NULL
## 6                           84    OFFICE OF CHARTER SCHOOLS
##   community_school_sup_name      Tier_3_Support_Location_Name
## 1                      NULL NYCDOE Borough Office - Manhattan
## 2        LOUISSAINT, KETLER                  D75 CITYWIDE BCO
## 3        LOUISSAINT, KETLER                  D75 CITYWIDE BCO
## 4                      NULL NYCDOE Borough Office - Manhattan
## 5                      NULL NYCDOE Borough Office - Manhattan
## 6                      NULL                              NULL
##   Tier_3_Support_Leader_Name     Tier_2_Support_Location_Name
## 1                  CHU, YUET School Support Team 1- Manhattan
## 2            Tillman Roberto       Children First Network 755
## 3            Tillman Roberto       Children First Network 755
## 4                  CHU, YUET School Support Team 1- Manhattan
## 5                  CHU, YUET School Support Team 1- Manhattan
## 6                       NULL                             NULL
##   HighSchool_Network_Location_Code             HighSchool_Network_Name
## 1                             HS04 HS Network 04 Superintendent Office
## 2                             NULL                                NULL
## 3                             NULL                                NULL
## 4                             HS04 HS Network 04 Superintendent Office
## 5                             HS04 HS Network 04 Superintendent Office
## 6                             NULL                                NULL
##   HighSchool_Network_Superintendent Community_district.1 Police_precinct
## 1                     ORLEN, VIVIAN                  101               1
## 2                              NULL                  102               6
## 3                              NULL                  102               6
## 4                     ORLEN, VIVIAN                  102               1
## 5                     ORLEN, VIVIAN                  102               1
## 6                              NULL                  102               1
##   home_ID                   geometry
## 1     130 POINT (-74.01392 40.71802)
## 2      10 POINT (-74.00616 40.72905)
## 3      10 POINT (-74.00616 40.72905)
## 4      10 POINT (-74.00506 40.72463)
## 5      10 POINT (-74.00506 40.72463)
## 6      10 POINT (-74.00469 40.72391)
# Homes with at least 1 school in its buffer
test3 <- manhattan_int_schools[manhattan_int_schools$num_schools > 0,]
head(test3)
##    home_ID                          ADDRESS              NEIGHBORHOOD
## 3        3  109 WEST 11TH STREET, Manhattan    GREENWICH VILLAGE-WEST
## 5        5   11 WEST 12TH STREET, Manhattan GREENWICH VILLAGE-CENTRAL
## 8        8  115 MANHATTAN AVENUE, Manhattan          MANHATTAN VALLEY
## 10      10   116 SULLIVAN STREET, Manhattan                      SOHO
## 11      11 116 WEST 130TH STREET, Manhattan            HARLEM-CENTRAL
## 12      12  116 WEST 80TH STREET, Manhattan   UPPER WEST SIDE (79-96)
##      BUILDING.CLASS.CATEGORY BLOCK LOT ZIP.CODE RESIDENTIAL.UNITS
## 3    01 ONE FAMILY DWELLINGS   607  50    10011                 1
## 5    01 ONE FAMILY DWELLINGS   576  48    10011                 1
## 8    02 TWO FAMILY DWELLINGS  1840 152    10025                 2
## 10   02 TWO FAMILY DWELLINGS   504  29    10012                 2
## 11 03 THREE FAMILY DWELLINGS  1914  41    10027                 3
## 12   01 ONE FAMILY DWELLINGS  1210 142    10024                 1
##    COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET GROSS.SQUARE.FEET YEAR.BUILT
## 3                 0           1            1,665             4,060       1899
## 5                 0           1            2,581             8,125       1910
## 8                 0           2              900             3,600       1899
## 10                0           2            2,350             5,446       1900
## 11                0           3            1,665             3,476       1910
## 12                0           1            1,839             5,520       1910
##    TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE
## 3                          1                             A4   11200000
## 5                          1                             A4   19900000
## 8                          1                             B1         10
## 10                         1                             B1   13450000
## 11                         1                             C0    2300000
## 12                         1                             A4    4675000
##    SALE.DATE      lat       lon num_schools                       geometry
## 3    5/22/19 40.73577 -73.99867           2      POINT (985433.1 207572.8)
## 5    6/14/19 40.73509 -73.99506           3 MULTIPOINT ((985433.1 20757...
## 8    2/11/20 40.79783 -73.96233           1      POINT (993448.1 229696.9)
## 10    3/2/20 40.72613 -74.00286           5 MULTIPOINT ((982544.1 20488...
## 11   4/19/19 40.81115 -73.94412           4 MULTIPOINT ((998395 234567....
## 12    8/2/19 40.78288 -73.97604           4      POINT (991316.2 225687.7)
map2 <- get_stamenmap(borders, zoom = 11, maptype = "terrain")

ggmap(map2) +
  geom_point(data = test2, mapping = aes(x = LONGITUDE, y = LATITUDE), col="Red", shape=3) + # Schools
  geom_point(data = test3, mapping = aes(x = lon, y = lat), col="Blue") # Homes

Healthy Stores

stores <- read.csv("https://raw.githubusercontent.com/lcao21/GIS3_Final_Project/master/Data/healthystores.csv", stringsAsFactors = FALSE)

Remove rows with zeros for longitude and latitude

stores <-stores[!is.na(stores$Longitude), ]

Filter for New York stores

stores <- stores[stores$Borough %like% "New York", ]

After removing stores without latitude and longitude values and aren’t located in the New York borough, I’m left with 68 stores.

Quick map of final stores

ggmap(map) +
  geom_point(data = stores, mapping = aes(x = Longitude, y = Latitude))

It seems that the dataset for grocery stores in New York are all clustered in the Upper East Side. My dataset for Manhattan homes, however, is spread throughout Manhattan so I expect that a lot of homes will have zero stores in their buffers. So, I decided to combine stores with data on farmers markets.

The farmers market data, as shown below, is spread out more evently than stores.

Clean farmers markets data and map it

farmers <- read.csv("https://raw.githubusercontent.com/lcao21/GIS3_Final_Project/master/Data/farmersmarkets.csv", stringsAsFactors = FALSE)
farmers <-farmers[!is.na(farmers$Longitude), ]
farmers <- farmers[farmers$Borough %like% "Manhattan", ]

ggmap(map) +
  geom_point(data = farmers, mapping = aes(x = Longitude, y = Latitude))

The final merged dataframe is called food and has four columns: name, street_address, latitude, and longitude.

Combine stores and farmers dataframes

food_names <- c(stores$Store.Name, farmers$Market.Name)

stores$street_address <- paste(stores$Street.Address, stores$Address, sep=" ")
food_addresses <- c(stores$street_address, farmers$Street.Address)

food_lat <- c(stores$Latitude, farmers$Latitude)

food_lon <- c(stores$Longitude, farmers$Longitude)

food <- data.frame("name" = food_names, "street_address" = food_addresses, "latitude" = food_lat, "longitude" = food_lon) 

Quick map of healthy grocery vendors

ggmap(map) +
  geom_point(data = food, mapping = aes(x = longitude, y = latitude))

Since I already created an sf object with 1609.34 meter buffers for each home–manhattan_sf_buff, I can use it again here. Now, I have to transform the food dataframe into an sf object so that I can use st_intersection() between the two.

foods_copy <- copy(food)
coordinates(foods_copy) <- c("longitude", "latitude")
proj4string(foods_copy) <- CRS("+init=epsg:4326")
food_sf <- st_as_sf(foods_copy) %>% st_transform(2263)
food_sf$food_ID <- seq.int(nrow(food_sf))
food_sf <- subset(food_sf, select=c("food_ID", "geometry"))

head(as.data.frame(food_sf))
##   food_ID                  geometry
## 1       1 POINT (999869.1 230322.1)
## 2       2  POINT (1000307 230058.2)
## 3       3  POINT (1000599 228063.3)
## 4       4    POINT (1000611 228095)
## 5       5  POINT (1001628 229567.3)
## 6       6  POINT (1000484 227856.3)

Plot of all homes and food vendors

plot(manhattan_sf_buff$geometry, col="Blue")
plot(st_geometry(food_sf), pch = 3, col = "Green", add= TRUE)

Calculate intersection

food_intersection <- st_intersection(x = manhattan_sf_buff, y = food_sf)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries

Plot of all homes and only food vendors that intersect

plot(manhattan_sf_buff$geometry, col="Blue")
plot(food_intersection, col = "Green", pch = 3, add = TRUE)
## Warning in plot.sf(food_intersection, col = "Green", pch = 3, add = TRUE):
## ignoring all but the first attribute

Calculate the number of food vendors within the buffer of each home

food_int_result <- food_intersection %>% 
  group_by(home_ID) %>% 
  count()

head(food_int_result)
## Simple feature collection with 6 features and 2 fields
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: 986930.1 ymin: 207834.2 xmax: 1000676 ymax: 235975.3
## CRS:            EPSG:2263
## # A tibble: 6 x 3
## # Groups:   home_ID [6]
##   home_ID     n                                                         geometry
##     <int> <int>                                      <GEOMETRY [US_survey_foot]>
## 1       5     1                                        POINT (986930.1 207834.2)
## 2       8     1                                        POINT (995422.1 231161.9)
## 3       9     6 MULTIPOINT ((997335.9 226796), (998309 226256.2), (998421 22664…
## 4      11     2               MULTIPOINT ((998555.1 234012), (1000676 235975.3))
## 5      12     1                                          POINT (990898 223916.9)
## 6      19    10 MULTIPOINT ((997335.9 226796), (998309 226256.2), (998421 22664…

Now I left joined manhattan_int_schools with food_int_result, using home_ID as the mutual column, and renamed the dataframe manhattan_ints because all the dependent variable columns will be in here or eventually merged into it. For rows that have NA values for the column of food vendor counts, I replaced them with zeros.

manhattan_ints <- merge(x = manhattan_int_schools, y = food_int_result, by = "home_ID", all.x = TRUE)
manhattan_ints$n[is.na(manhattan_ints$n)] <- 0
colnames(manhattan_ints)[22] <- "num_food"

head(manhattan_ints)
##   home_ID                         ADDRESS              NEIGHBORHOOD
## 1       1       1 SUTTON PLACE, Manhattan              MIDTOWN EAST
## 2       2 106 EAST 78TH STREET, Manhattan   UPPER EAST SIDE (59-79)
## 3       3 109 WEST 11TH STREET, Manhattan    GREENWICH VILLAGE-WEST
## 4       4   11 EAST 129 STREET, Manhattan            HARLEM-CENTRAL
## 5       5  11 WEST 12TH STREET, Manhattan GREENWICH VILLAGE-CENTRAL
## 6       6 110 WASHINGTON PLACE, Manhattan    GREENWICH VILLAGE-WEST
##     BUILDING.CLASS.CATEGORY BLOCK LOT ZIP.CODE RESIDENTIAL.UNITS
## 1   01 ONE FAMILY DWELLINGS  1372  26    10022                 1
## 2   01 ONE FAMILY DWELLINGS  1412  68    10075                 1
## 3   01 ONE FAMILY DWELLINGS   607  50    10011                 1
## 4   01 ONE FAMILY DWELLINGS  1754   8    10035                 1
## 5   01 ONE FAMILY DWELLINGS   576  48    10011                 1
## 6 03 THREE FAMILY DWELLINGS   592  14    10014                 3
##   COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET GROSS.SQUARE.FEET YEAR.BUILT
## 1                0           1            1,434             5,339       1920
## 2                1           2            1,839             4,344       1899
## 3                0           1            1,665             4,060       1899
## 4                0           1            1,081             2,800       1905
## 5                0           1            2,581             8,125       1910
## 6                0           3            1,575             2,646       1899
##   TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE SALE.DATE
## 1                         1                             A7   13000000   10/8/19
## 2                         1                             S1    7200000   6/10/19
## 3                         1                             A4   11200000   5/22/19
## 4                         1                             A4    1550000   8/22/19
## 5                         1                             A4   19900000   6/14/19
## 6                         1                             C0    6800000   4/15/19
##        lat       lon num_schools                     geometry.x num_food
## 1 40.75738 -73.96031           0       GEOMETRYCOLLECTION EMPTY        0
## 2 40.77457 -73.96056           0       GEOMETRYCOLLECTION EMPTY        0
## 3 40.73577 -73.99867           2      POINT (985433.1 207572.8)        0
## 4 40.75523 -73.96185           0       GEOMETRYCOLLECTION EMPTY        0
## 5 40.73509 -73.99506           3 MULTIPOINT ((985433.1 20757...        1
## 6 40.73242 -74.00115           0       GEOMETRYCOLLECTION EMPTY        0
##                  geometry.y
## 1  GEOMETRYCOLLECTION EMPTY
## 2  GEOMETRYCOLLECTION EMPTY
## 3  GEOMETRYCOLLECTION EMPTY
## 4  GEOMETRYCOLLECTION EMPTY
## 5 POINT (986930.1 207834.2)
## 6  GEOMETRYCOLLECTION EMPTY

Map of only homes and food vendors that intersect

# Food vendors that intersect with homes 
food_that_int <- as.data.frame(st_transform(food_intersection, crs = 4326))
food_that_int <- food_that_int[!duplicated(food_that_int[,c("food_ID")]),]

food$food_ID <- seq.int(nrow(food))

food_merged <- merge(food, food_that_int)
head(food_merged) 
##   food_ID                              name        street_address latitude
## 1       1 Comidas Dominicanas Capellan Deli      1626 Park Avenue 40.79884
## 2       5                    Juice Bar Deli 308 East 116th Street 40.79677
## 3       9                El Tepeyac Grocery 1621 Lexington Avenue 40.78992
## 4      13                   JJ Food  Market  153 East 99th Street 40.78769
## 5      14                    Deli Mini Mart 339 East 115th Street 40.79565
## 6      15               Cherry Deli Gourmet 1575 Lexington Avenue 40.78874
##   longitude home_ID                   geometry
## 1 -73.94359     156 POINT (-73.94359 40.79884)
## 2 -73.93724      93 POINT (-73.93724 40.79677)
## 3 -73.94796      19 POINT (-73.94796 40.78992)
## 4 -73.94923       9 POINT (-73.94923 40.78769)
## 5 -73.93670      93  POINT (-73.9367 40.79565)
## 6 -73.94882       9 POINT (-73.94882 40.78874)
# Homes with at least 1 food vendor in its buffer
homes_with_food <- manhattan_ints[manhattan_ints$num_food > 0,]
head(homes_with_food)
##    home_ID                          ADDRESS              NEIGHBORHOOD
## 5        5   11 WEST 12TH STREET, Manhattan GREENWICH VILLAGE-CENTRAL
## 8        8  115 MANHATTAN AVENUE, Manhattan          MANHATTAN VALLEY
## 9        9  116 EAST 95TH STREET, Manhattan   UPPER EAST SIDE (79-96)
## 11      11 116 WEST 130TH STREET, Manhattan            HARLEM-CENTRAL
## 12      12  116 WEST 80TH STREET, Manhattan   UPPER WEST SIDE (79-96)
## 19      19      1255 PARK AVENUE, Manhattan               HARLEM-EAST
##      BUILDING.CLASS.CATEGORY BLOCK LOT ZIP.CODE RESIDENTIAL.UNITS
## 5    01 ONE FAMILY DWELLINGS   576  48    10011                 1
## 8    02 TWO FAMILY DWELLINGS  1840 152    10025                 2
## 9    01 ONE FAMILY DWELLINGS  1523  68    10128                 1
## 11 03 THREE FAMILY DWELLINGS  1914  41    10027                 3
## 12   01 ONE FAMILY DWELLINGS  1210 142    10024                 1
## 19   02 TWO FAMILY DWELLINGS  1625   1    10029                 2
##    COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET GROSS.SQUARE.FEET YEAR.BUILT
## 5                 0           1            2,581             8,125       1910
## 8                 0           2              900             3,600       1899
## 9                 0           1            2,013             4,160       1899
## 11                0           3            1,665             3,476       1910
## 12                0           1            1,839             5,520       1910
## 19                0           2              847             1,650       1920
##    TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE
## 5                          1                             A4   19900000
## 8                          1                             B1         10
## 9                          1                             A9    8000000
## 11                         1                             C0    2300000
## 12                         1                             A4    4675000
## 19                         1                             B1     906500
##    SALE.DATE      lat       lon num_schools                     geometry.x
## 5    6/14/19 40.73509 -73.99506           3 MULTIPOINT ((985433.1 20757...
## 8    2/11/20 40.79783 -73.96233           1      POINT (993448.1 229696.9)
## 9    3/20/20 40.78548 -73.95259           0       GEOMETRYCOLLECTION EMPTY
## 11   4/19/19 40.81115 -73.94412           4 MULTIPOINT ((998395 234567....
## 12    8/2/19 40.78288 -73.97604           4      POINT (991316.2 225687.7)
## 19    2/7/20 40.78720 -73.95172           0       GEOMETRYCOLLECTION EMPTY
##    num_food                     geometry.y
## 5         1      POINT (986930.1 207834.2)
## 8         1      POINT (995422.1 231161.9)
## 9         6 MULTIPOINT ((997335.9 22679...
## 11        2 MULTIPOINT ((998555.1 23401...
## 12        1        POINT (990898 223916.9)
## 19       10 MULTIPOINT ((997335.9 22679...
ggmap(map2) +
  geom_point(data = food_merged, mapping = aes(x = longitude, y = latitude), col="Green", shape=3) + # Food 
  geom_point(data = homes_with_food, mapping = aes(x = lon, y = lat), col="Blue") # Homes

I repeat the whole process again but for the parks dataset this time. Unlike schools, stores, and farmers’ markets, however, parks aren’t represent as points but rather as polygons, and I loaded them in as geojson file.

Parks

library(geojsonsf)
parks <-geojson_sf("parks.geojson")
parks$park_ID <- seq.int(nrow(parks))
head(as.data.frame(parks))
##   system    shape_leng    shape_area parknum           landuse
## 1   <NA> 829.420106841 41539.8013629    B021    Community Park
## 2   <NA> 607.555193338 21960.4949197    B222 Neighborhood Park
## 3   <NA>  784.42199003 28079.0762647    B326 Neighborhood Park
## 4   <NA> 2672.71014961 438416.114511    B021    Community Park
## 5   <NA> 1376.10639843  14465.699799  B223DG    Triangle/Plaza
## 6   <NA> 1218.55982108 54510.4377633   B223K           Parkway
##                    park_name sub_code feat_code    status   source_id
## 1       Commodore Barry Park   491050      4910   Updated 21491000001
## 2      Pierrepont Playground   498000      4980 Unchanged 21498000002
## 3           Cobble Hill Park   498000      4980 Unchanged 21498000003
## 4       Commodore Barry Park   498000      4980 Unchanged 21498000004
## 5 Brooklyn Heights Promenade   498000      4980 Unchanged 21498000005
## 6               Trinity Park   498000      4980 Unchanged 21498000015
##                         geometry park_ID
## 1 MULTIPOLYGON (((-73.97922 4...       1
## 2 MULTIPOLYGON (((-73.99732 4...       2
## 3 MULTIPOLYGON (((-73.99557 4...       3
## 4 MULTIPOLYGON (((-73.97747 4...       4
## 5 MULTIPOLYGON (((-73.99747 4...       5
## 6 MULTIPOLYGON (((-73.98416 4...       6

There isn’t a column for borough, so I can’t filter for only parks in Manhattan. Nevertheless, this shouldn’t be a problem once I calculate the intersections. First, I have to re-project the parks data.

Re-project parks so that it has the same crs (2263) as the homes

parks_copy <- copy(parks)
parks_sf <- st_transform(parks_copy, crs = 2263)

Plot all homes and parks

plot(manhattan_sf_buff$geometry, col="Blue")
plot(st_geometry(parks_sf), col = "darkseagreen1", add= TRUE)

Calculate intersection

parks_intersection <- st_intersection(x = manhattan_sf_buff, y = parks_sf)
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries

Plot of all homes and only parks that intersect

plot(manhattan_sf_buff$geometry, col="Blue")
plot(parks_intersection, col = "darkseagreen1", add = TRUE)
## Warning in plot.sf(parks_intersection, col = "darkseagreen1", add = TRUE):
## ignoring all but the first attribute

Calculate the number of parks within the buffer of each home

parks_int_result <- parks_intersection %>% 
  group_by(home_ID) %>% 
  count()

head(parks_int_result)
## Simple feature collection with 6 features and 2 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 982567.6 ymin: 204537.8 xmax: 996362.5 ymax: 222983.7
## CRS:            EPSG:2263
## # A tibble: 6 x 3
## # Groups:   home_ID [6]
##   home_ID     n                                                         geometry
##     <int> <int>                                  <MULTIPOLYGON [US_survey_foot]>
## 1       1    18 (((994393.8 213848.9, 994369.7 213863.6, 994359 213870.9, 99440…
## 2       2    14 (((994251.7 220156, 994232.2 220169.3, 994234.2 220172.9, 99423…
## 3       3    20 (((985002 205774.6, 984953.7 205763, 984870.8 205747.7, 984787.…
## 4       4    13 (((993976 213408.1, 993977.7 213405.8, 993979.3 213403.4, 99398…
## 5       5     5 (((985441.6 205490.2, 985368.6 205499.8, 985285.8 205515.2, 985…
## 6       6    41 (((983626.9 204537.8, 983605.5 204541.8, 983576.8 204559.4, 983…

The counts seem really high but if we look at parks_intersection, we see that some of the parks are being counted multiple times for the same parks. This is because the original dataset assigns different shape_area, system, source_id, parknum, and shape_leng columns to some of the same parks. So even after applying the unique() function, I get the same rows.

nrow(as.data.frame(unique(parks_intersection))) == nrow(as.data.frame(parks_intersection))
## [1] TRUE

I add the counts of parks that intersect with home buffers to the main dataframe: manhattan_ints.

Left join manhattan_ints with parks_int_result

manhattan_ints <- merge(x = manhattan_ints, y = parks_int_result, by = "home_ID", all.x = TRUE)
manhattan_ints$n[is.na(manhattan_ints$n)] <- 0
colnames(manhattan_ints)[24] <- "num_parks"
head(manhattan_ints)
##   home_ID                         ADDRESS              NEIGHBORHOOD
## 1       1       1 SUTTON PLACE, Manhattan              MIDTOWN EAST
## 2       2 106 EAST 78TH STREET, Manhattan   UPPER EAST SIDE (59-79)
## 3       3 109 WEST 11TH STREET, Manhattan    GREENWICH VILLAGE-WEST
## 4       4   11 EAST 129 STREET, Manhattan            HARLEM-CENTRAL
## 5       5  11 WEST 12TH STREET, Manhattan GREENWICH VILLAGE-CENTRAL
## 6       6 110 WASHINGTON PLACE, Manhattan    GREENWICH VILLAGE-WEST
##     BUILDING.CLASS.CATEGORY BLOCK LOT ZIP.CODE RESIDENTIAL.UNITS
## 1   01 ONE FAMILY DWELLINGS  1372  26    10022                 1
## 2   01 ONE FAMILY DWELLINGS  1412  68    10075                 1
## 3   01 ONE FAMILY DWELLINGS   607  50    10011                 1
## 4   01 ONE FAMILY DWELLINGS  1754   8    10035                 1
## 5   01 ONE FAMILY DWELLINGS   576  48    10011                 1
## 6 03 THREE FAMILY DWELLINGS   592  14    10014                 3
##   COMMERCIAL.UNITS TOTAL.UNITS LAND.SQUARE.FEET GROSS.SQUARE.FEET YEAR.BUILT
## 1                0           1            1,434             5,339       1920
## 2                1           2            1,839             4,344       1899
## 3                0           1            1,665             4,060       1899
## 4                0           1            1,081             2,800       1905
## 5                0           1            2,581             8,125       1910
## 6                0           3            1,575             2,646       1899
##   TAX.CLASS.AT.TIME.OF.SALE BUILDING.CLASS.AT.TIME.OF.SALE SALE.PRICE SALE.DATE
## 1                         1                             A7   13000000   10/8/19
## 2                         1                             S1    7200000   6/10/19
## 3                         1                             A4   11200000   5/22/19
## 4                         1                             A4    1550000   8/22/19
## 5                         1                             A4   19900000   6/14/19
## 6                         1                             C0    6800000   4/15/19
##        lat       lon num_schools                     geometry.x num_food
## 1 40.75738 -73.96031           0       GEOMETRYCOLLECTION EMPTY        0
## 2 40.77457 -73.96056           0       GEOMETRYCOLLECTION EMPTY        0
## 3 40.73577 -73.99867           2      POINT (985433.1 207572.8)        0
## 4 40.75523 -73.96185           0       GEOMETRYCOLLECTION EMPTY        0
## 5 40.73509 -73.99506           3 MULTIPOINT ((985433.1 20757...        1
## 6 40.73242 -74.00115           0       GEOMETRYCOLLECTION EMPTY        0
##                  geometry.y num_parks                       geometry
## 1  GEOMETRYCOLLECTION EMPTY        18 MULTIPOLYGON (((994393.8 21...
## 2  GEOMETRYCOLLECTION EMPTY        14 MULTIPOLYGON (((994251.7 22...
## 3  GEOMETRYCOLLECTION EMPTY        20 MULTIPOLYGON (((985002 2057...
## 4  GEOMETRYCOLLECTION EMPTY        13 MULTIPOLYGON (((993976 2134...
## 5 POINT (986930.1 207834.2)         5 MULTIPOLYGON (((985441.6 20...
## 6  GEOMETRYCOLLECTION EMPTY        41 MULTIPOLYGON (((983626.9 20...